Index
#!pip install scikit-fda
import os
os.chdir("../../")
# Import libraries
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import altair as alt
import random
import statsmodels.api as sm
from skfda.representation.grid import FDataGrid
from skfda.preprocessing.dim_reduction.projection import FPCA
from skfda.exploratory.visualization import FPCAPlot
from sklearn.preprocessing import OneHotEncoder
import skfda
from skfda.ml.regression import LinearRegression
from skfda.representation.basis import FDataBasis, FourierBasis
from skfda.exploratory.depth import IntegratedDepth, ModifiedBandDepth
from skfda.exploratory.visualization import Boxplot
# Import designed-functions
from window_extraction import calculate_window_values, calculate_window_data, Merge_data, align_to_zero, balance_index
from time_series_visualization import plot_all_time_series, plot_all_time_series_and_mean_fpca, plot_all_time_series_in_group
from functionalPCA import fpca_two_inputs, first_component_extraction, bootstrap, create_pc_scores_plots, visualize_regression
from functional_regression import Function_regression, coefficent_visualization
/var/folders/vh/dw36swbx2939r11_2dkm6r4m0000gn/T/ipykernel_51471/3302358765.py:9: DeprecationWarning: The module "projection" is deprecated. Please use "dim_reduction" from skfda.preprocessing.dim_reduction.projection import FPCA
The path of the files can be change based on where the data is stored.
# Import datasets
sensorA_System1 = pd.read_csv("../../../../RawData/System1_SensorA.csv")
sensorA_System2 = pd.read_csv("../../../../RawData/System2_SensorA.csv")
sensorB_System1 = pd.read_csv("../../../../RawData/System1_SensorB.csv")
sensorB_System2 = pd.read_csv("../../../../RawData/System2_SensorB.csv")
sensorA_System1_missing = pd.read_csv("../../../../RawData/SensorA_System1_missing values.csv")
sensorA_System2_missing = pd.read_csv("../../../../RawData/SensorA_System2_missing values.csv")
keyByTestID = pd.read_csv("../../../../RawData/Key by TestID.csv", parse_dates=['DateTime'])
# Transpose dataset to make columns as timestamps and rows as tests
# Sensor A
A1_transposed = sensorA_System1.T.reset_index()
A1_transposed.columns = A1_transposed.iloc[0]
A1_transposed.rename(columns={A1_transposed.columns[0]: 'TestID'}, inplace=True)
A1_transposed = A1_transposed.drop(0)
A1_transposed['TestID'] = A1_transposed['TestID'].astype(int)
A2_transposed = sensorA_System2.T.reset_index()
A2_transposed.columns = A2_transposed.iloc[0]
A2_transposed.rename(columns={A2_transposed.columns[0]: 'TestID'}, inplace=True)
A2_transposed = A2_transposed.drop(0)
A2_transposed['TestID'] = A2_transposed['TestID'].astype(int)
A1_missing_transposed = sensorA_System1_missing.T.reset_index()
A1_missing_transposed.columns = A1_missing_transposed.iloc[0]
A1_missing_transposed.rename(columns={A1_missing_transposed.columns[0]: 'TestID'}, inplace=True)
A1_missing_transposed = A1_missing_transposed.drop(0)
A1_missing_transposed['TestID'] = A1_missing_transposed['TestID'].astype(int)
A2_missing_transposed = sensorA_System2_missing.T.reset_index()
A2_missing_transposed.columns = A2_missing_transposed.iloc[0]
A2_missing_transposed.rename(columns={A2_missing_transposed.columns[0]: 'TestID'}, inplace=True)
A2_missing_transposed = A2_missing_transposed.drop(0)
A2_missing_transposed['TestID'] = A2_missing_transposed['TestID'].astype(int)
# Sensor B
B1_transposed = sensorB_System1.T.reset_index()
B1_transposed.columns = B1_transposed.iloc[0]
B1_transposed.rename(columns={B1_transposed.columns[0]: 'TestID'}, inplace=True)
B1_transposed = B1_transposed.drop(0)
B1_transposed['TestID'] = B1_transposed['TestID'].astype(int)
B2_transposed = sensorB_System2.T.reset_index()
B2_transposed.columns = B2_transposed.iloc[0]
B2_transposed.rename(columns={B2_transposed.columns[0]: 'TestID'}, inplace=True)
B2_transposed = B2_transposed.drop(0)
B2_transposed['TestID'] = B2_transposed['TestID'].astype(int)
# Complete A1 and A2 with the missing values
A1_transposed_mid = A1_transposed[~A1_transposed.TestID.isin(A1_missing_transposed.TestID)]
A1_transposed = pd.concat([A1_transposed_mid, A1_missing_transposed], axis=0)
A2_transposed_mid = A2_transposed[~A2_transposed.TestID.isin(A2_missing_transposed.TestID)]
A2_transposed = pd.concat([A2_transposed_mid, A2_missing_transposed], axis=0)
# Relabeling System Values
keyByTestID["System"] = keyByTestID["System"].replace({"System 2A":"System 2","System 2B":"System 2"})
# Create new column to fill fluid temperature NA's
# Note: Fluid temperature: If specified, take as the temperature of the sample fluid. The rest of the system temperature can be taken as ambient temperature.
keyByTestID['Fluid_Temperature_Filled'] = keyByTestID['Fluid Temperature'].combine_first(keyByTestID['AmbientTemperature'])
# Binning
# Categorize 'FluidType' into Blood and Aqueous
keyByTestID['FluidTypeBin'] = np.where(keyByTestID['FluidType'].str.startswith('Eurotrol'), 'Aqueous', 'Blood')
# Categorize 'AgeOfCardInDaysAtTimeOfTest' into bins
keyByTestID["CardAgeBin"] = pd.cut(keyByTestID["AgeOfCardInDaysAtTimeOfTest"], bins=[0, 9, 28, 56, 84, 112, 140, 168, 196, 224, 252],
labels=['[0-9]', '(9-28]', '(28-56]', '(56-84]', '(84-112]', '(112-140]', '(140-168]', '(168-196]', '(196-224]', '(224-252]'])
# Categorize 'Fluid_Temperature_Filled' into bins
keyByTestID["FluidTempBin"] = pd.cut(keyByTestID["Fluid_Temperature_Filled"], bins=[-1, 20, 25, 100], labels=['Below 20', '20-25', 'Above 25'])
# Filtering successful tests
keyByTestID = keyByTestID[keyByTestID['ReturnCode'].isin(['Success','UnderReportableRange'])]
# Merge dataset with keyByTestID and delete unmatched tests
keyByTestID['TestID'] = keyByTestID['TestID'].astype(int)
keyByTestID['System'] = keyByTestID['System'].astype(str)
A1_keyByTestID = keyByTestID[(keyByTestID['Sensor'] == 'Sensor A') & (keyByTestID['System'] == 'System 1')]
A1_Merged = pd.merge(A1_keyByTestID,A1_transposed,how='inner', on=['TestID'])
A1_transposed = A1_transposed[A1_transposed['TestID'].isin(A1_Merged['TestID'])]
A2_keyByTestID = keyByTestID.loc[(keyByTestID['Sensor'] == 'Sensor A') & (keyByTestID['System'] != 'System 1')]
A2_Merged = pd.merge(A2_keyByTestID,A2_transposed,how='inner', on=['TestID'])
A2_transposed = A2_transposed[A2_transposed['TestID'].isin(A2_Merged['TestID'])]
sensorA_System1 = sensorA_System1.loc[:, sensorA_System1.columns.isin(A1_Merged['TestID'].astype(str))]
sensorA_System2 = sensorA_System2.loc[:, sensorA_System2.columns.isin(A2_Merged['TestID'].astype(str))]
B1_keyByTestID = keyByTestID[(keyByTestID['Sensor'] == 'Sensor B') & (keyByTestID['System'] == 'System 1')]
B1_Merged = pd.merge(B1_keyByTestID,B1_transposed,how='inner', on=['TestID'])
B1_transposed = B1_transposed[B1_transposed['TestID'].isin(B1_Merged['TestID'])]
B2_keyByTestID = keyByTestID.loc[(keyByTestID['Sensor'] == 'Sensor B') & (keyByTestID['System'] != 'System 1')]
B2_Merged = pd.merge(B2_keyByTestID,B2_transposed,how='inner', on=['TestID'])
B1_transposed = B2_transposed[B2_transposed['TestID'].isin(A2_Merged['TestID'])]
sensorB_System1 = sensorB_System1.loc[:, sensorB_System1.columns.isin(B1_Merged['TestID'].astype(str))]
sensorB_System2 = sensorB_System2.loc[:, sensorB_System2.columns.isin(B2_Merged['TestID'].astype(str))]
print('A1: ', A1_Merged.shape)
print('A2: ', A2_Merged.shape)
print('B1: ', B1_Merged.shape)
print('B2: ', B2_Merged.shape)
A1: (3382, 3380) A2: (7743, 3371) B1: (3375, 3380) B2: (7745, 3371)
# Note: Only run once. If not, restart the kernel and run from the beggining again.
A1_Merged = A1_Merged[A1_Merged["TestID"].isin(B1_Merged["TestID"])]
B1_Merged = B1_Merged[B1_Merged["TestID"].isin(A1_Merged["TestID"])]
A2_Merged = A2_Merged[A2_Merged["TestID"].isin(B2_Merged["TestID"])]
B2_Merged = B2_Merged[B2_Merged["TestID"].isin(A2_Merged["TestID"])]
print('A1: ', A1_Merged.shape)
print('A2: ', A2_Merged.shape)
print('B1: ', B1_Merged.shape)
print('B2: ', B2_Merged.shape)
A1: (3374, 3380) A2: (7743, 3371) B1: (3374, 3380) B2: (7743, 3371)
# Match window values of Sensor A for each test
# Sensor A System 1
calDelimit = 11
cal_window_size = 8
sampleDelimit = 15
sample_window_size = 5
cal_window_start, cal_window_end, sample_window_start, sample_window_end = calculate_window_values(bubble_start=A1_Merged['BubbleDetectTime'],
sample_start=A1_Merged['SampleDetectTime'],
calDelimit_input=calDelimit,
cal_window_size_input=cal_window_size,
sampleDelimit_input=sampleDelimit,
sample_window_size_input=sample_window_size)
A1_Merged['cal_window_start']=cal_window_start
A1_Merged['cal_window_end']=cal_window_end
A1_Merged['sample_window_start']=sample_window_start
A1_Merged['sample_window_end']=sample_window_end
# Sensor A System 2
calDelimit = 9.6
cal_window_size = 8
sampleDelimit = 17.2
sample_window_size = 5
cal_window_start, cal_window_end, sample_window_start, sample_window_end = calculate_window_values(bubble_start=A2_Merged['BubbleDetectTime'],
sample_start=A2_Merged['SampleDetectTime'],
calDelimit_input=calDelimit,
cal_window_size_input=cal_window_size,
sampleDelimit_input=sampleDelimit,
sample_window_size_input=sample_window_size)
A2_Merged['cal_window_start']=cal_window_start
A2_Merged['cal_window_end']=cal_window_end
A2_Merged['sample_window_start']=sample_window_start
A2_Merged['sample_window_end']=sample_window_end
# sensor B
# For sensor B in system 1, blood and aqueous
calDelimit = 20
cal_window_size = 18
sampleDelimit_blood = 24
sampleDelimit_aqueous = 30
sample_window_size = 4
B1_Merged['cal_window_start'], B1_Merged['cal_window_end'], \
B1_Merged['sample_window_start'], B1_Merged['sample_window_end'] = zip(*B1_Merged.apply(
lambda row: calculate_window_values(
bubble_start=row['BubbleDetectTime'],
sample_start=row['SampleDetectTime'],
calDelimit_input=calDelimit,
cal_window_size_input=cal_window_size,
sampleDelimit_input=sampleDelimit_aqueous if row['FluidType'].startswith('Eurotrol') else sampleDelimit_blood,
sample_window_size_input=sample_window_size
),
axis=1
))
# For sensor B in system 2, blood and aqueous
calDelimit = 18
cal_window_size = 18
sampleDelimit_blood = 30.4
sampleDelimit_aqueous = 32.8
sample_window_size = 4
B2_Merged['cal_window_start'], B2_Merged['cal_window_end'], \
B2_Merged['sample_window_start'], B2_Merged['sample_window_end'] = zip(*B2_Merged.apply(
lambda row: calculate_window_values(
bubble_start=row['BubbleDetectTime'],
sample_start=row['SampleDetectTime'],
calDelimit_input=calDelimit,
cal_window_size_input=cal_window_size,
sampleDelimit_input=sampleDelimit_aqueous if row['FluidType'].startswith('Eurotrol') else sampleDelimit_blood,
sample_window_size_input=sample_window_size
),
axis=1
))
# Adds TestIDs as index to the values after window extraction
# System 1 - Sensor A
A1_cal_window = []
A1_sample_window = []
for i in range(len(A1_Merged)):
cal_window, sample_window = calculate_window_data(A1_Merged.iloc[i, :])
A1_cal_window.append(cal_window.values)
A1_sample_window.append(sample_window.values)
A1_cal_window = pd.DataFrame(A1_cal_window)
A1_sample_window = pd.DataFrame(A1_sample_window)
A1_cal_window['TestID'] = A1_sample_window['TestID'] = A1_Merged['TestID'].astype(int)
A1_sample_window.set_index('TestID',inplace=True)
A1_cal_window.set_index('TestID',inplace=True)
# System 2 - Sensor A
A2_cal_window = []
A2_sample_window = []
for i in range(len(A2_Merged)):
cal_window, sample_window = calculate_window_data(A2_Merged.iloc[i, :])
A2_cal_window.append(cal_window.values)
A2_sample_window.append(sample_window.values)
A2_cal_window = pd.DataFrame(A2_cal_window)
A2_sample_window = pd.DataFrame(A2_sample_window)
A2_cal_window['TestID'] = A2_sample_window['TestID'] = A2_Merged['TestID'].astype(int)
A2_sample_window.set_index('TestID',inplace=True)
A2_cal_window.set_index('TestID',inplace=True)
# System 1 - Sensor B
B1_cal_window = []
B1_sample_window = []
for i in range(len(B1_Merged)):
cal_window, sample_window = calculate_window_data(B1_Merged.iloc[i, :])
B1_cal_window.append(cal_window.values)
B1_sample_window.append(sample_window.values)
B1_cal_window = pd.DataFrame(B1_cal_window)
B1_sample_window = pd.DataFrame(B1_sample_window)
B1_cal_window['TestID'] = B1_sample_window['TestID'] = B1_Merged['TestID'].astype(int)
B1_sample_window.set_index('TestID',inplace=True)
B1_cal_window.set_index('TestID',inplace=True)
# System 2 - Sensor B
B2_cal_window = []
B2_sample_window = []
for i in range(len(B2_Merged)):
cal_window, sample_window = calculate_window_data(B2_Merged.iloc[i, :])
B2_cal_window.append(cal_window.values)
B2_sample_window.append(sample_window.values)
B2_cal_window = pd.DataFrame(B2_cal_window)
B2_sample_window = pd.DataFrame(B2_sample_window)
B2_cal_window['TestID'] = B2_sample_window['TestID'] = B2_Merged['TestID'].astype(int)
B2_sample_window.set_index('TestID',inplace=True)
B2_cal_window.set_index('TestID',inplace=True)
A1_cal_window_drop_index = A1_cal_window.loc[A1_cal_window.isna().sum(axis=1)!=0].index
A2_cal_window_drop_index = A2_cal_window.loc[A2_cal_window.isna().sum(axis=1)!=0].index
A1_sample_window_drop_index = A1_sample_window.loc[A1_sample_window.isna().sum(axis=1)!=0].index
A2_sample_window_drop_index = A2_sample_window.loc[A2_sample_window.isna().sum(axis=1)!=0].index
B1_cal_window_drop_index = B1_cal_window.loc[B1_cal_window.isna().sum(axis=1)!=0].index
B2_cal_window_drop_index = B2_cal_window.loc[B2_cal_window.isna().sum(axis=1)!=0].index
B1_sample_window_drop_index = B1_sample_window.loc[B1_sample_window.isna().sum(axis=1)!=0].index
B2_sample_window_drop_index = B2_sample_window.loc[B2_sample_window.isna().sum(axis=1)!=0].index
# Check if missing values in different windows is different
print("The missing value in calibration window:",A1_cal_window_drop_index)
print("The missing value in sample window:",A1_sample_window_drop_index)
print("The missing value in calibration window:",A2_cal_window_drop_index)
print("The missing value in sample window:",A2_sample_window_drop_index)
print("The missing value in calibration window:",B1_cal_window_drop_index)
print("The missing value in sample window:",B1_sample_window_drop_index)
print("The missing value in calibration window:",B2_cal_window_drop_index)
print("The missing value in sample window:",B2_sample_window_drop_index)
The missing value in calibration window: Float64Index([], dtype='float64', name='TestID') The missing value in sample window: Float64Index([], dtype='float64', name='TestID') The missing value in calibration window: Int64Index([], dtype='int64', name='TestID') The missing value in sample window: Int64Index([3561566, 3561567, 3565465, 3565466], dtype='int64', name='TestID') The missing value in calibration window: Float64Index([], dtype='float64', name='TestID') The missing value in sample window: Float64Index([], dtype='float64', name='TestID') The missing value in calibration window: Float64Index([], dtype='float64', name='TestID') The missing value in sample window: Float64Index([], dtype='float64', name='TestID')
# Set index for Merge datasets
A1_Merged.set_index("TestID", inplace=True)
A2_Merged.set_index("TestID", inplace=True)
B1_Merged.set_index("TestID", inplace=True)
B2_Merged.set_index("TestID", inplace=True)
# Find missing value
print("The problem indexes after extract the window are:",A1_Merged.index.difference(A1_cal_window.index))
print("The problem indexes after extract the window are:",A1_Merged.index.difference(A1_sample_window.index))
print("The problem indexes after extract the window are:",A2_Merged.index.difference(A2_cal_window.index))
print("The problem indexes after extract the window are:",A2_Merged.index.difference(A2_sample_window.index))
print("The problem indexes after extract the window are:",B1_Merged.index.difference(B1_cal_window.index))
print("The problem indexes after extract the window are:",B1_Merged.index.difference(B1_sample_window.index))
print("The problem indexes after extract the window are:",B2_Merged.index.difference(B2_cal_window.index))
print("The problem indexes after extract the window are:",B2_Merged.index.difference(B2_sample_window.index))
A1_Merged = A1_Merged.drop(A1_Merged.index.difference(A1_cal_window.index))
A1_Merged = A1_Merged.drop(A1_Merged.index.difference(A1_sample_window.index))
A2_Merged = A2_Merged.drop(A2_Merged.index.difference(A2_cal_window.index))
A2_Merged = A2_Merged.drop(A2_Merged.index.difference(A2_sample_window.index))
B1_Merged = B1_Merged.drop(B1_Merged.index.difference(B1_cal_window.index))
B1_Merged = B1_Merged.drop(B1_Merged.index.difference(B1_sample_window.index))
B2_Merged = B2_Merged.drop(B2_Merged.index.difference(B2_cal_window.index))
B2_Merged = B2_Merged.drop(B2_Merged.index.difference(B2_sample_window.index))
# Clear the Nan in index of sensor A
A1_cal_window = A1_cal_window[~A1_cal_window.index.isna()]
A1_sample_window = A1_sample_window[~A1_sample_window.index.isna()]
A2_cal_window = A2_cal_window[~A2_cal_window.index.isna()]
A2_sample_window = A2_sample_window[~A2_sample_window.index.isna()]
# Clear the Nan in index of sensor B
B1_cal_window = B1_cal_window[~B1_cal_window.index.isna()]
B1_sample_window = B1_sample_window[~B1_sample_window.index.isna()]
B2_cal_window = B2_cal_window[~B2_cal_window.index.isna()]
B2_sample_window = B2_sample_window[~B2_sample_window.index.isna()]
The problem indexes after extract the window are: Int64Index([12470355, 12470361, 12470365, 12537663, 12539049, 12622570], dtype='int64', name='TestID') The problem indexes after extract the window are: Int64Index([12470355, 12470361, 12470365, 12537663, 12539049, 12622570], dtype='int64', name='TestID') The problem indexes after extract the window are: Int64Index([], dtype='int64', name='TestID') The problem indexes after extract the window are: Int64Index([], dtype='int64', name='TestID') The problem indexes after extract the window are: Int64Index([12622570], dtype='int64', name='TestID') The problem indexes after extract the window are: Int64Index([12622570], dtype='int64', name='TestID') The problem indexes after extract the window are: Int64Index([3518677, 3518678], dtype='int64', name='TestID') The problem indexes after extract the window are: Int64Index([3518677, 3518678], dtype='int64', name='TestID')
# Shape of the subsets of time series after the extraction from the windows
# Cal Window
print('Shape of the time series after extraction')
print('A1_cal_window: ', A1_cal_window.shape)
print('A2_cal_window: ', A2_cal_window.shape)
print('B1_cal_window: ', B1_cal_window.shape)
print('B2_cal_window: ', B2_cal_window.shape)
# Sample Window
print('A1_sample_window: ', A1_sample_window.shape)
print('A2_sample_window: ', A2_sample_window.shape)
print('B1_sample_window: ', B1_sample_window.shape)
print('B2_sample_window: ', B2_sample_window.shape)
# We can delete the unmatch index but it is not necessary
Shape of the time series after extraction A1_cal_window: (3368, 41) A2_cal_window: (7743, 41) B1_cal_window: (3373, 91) B2_cal_window: (7741, 91) A1_sample_window: (3368, 26) A2_sample_window: (7743, 26) B1_sample_window: (3373, 21) B2_sample_window: (7741, 21)
# Cal Window
A1_cal_window_zero = align_to_zero(A1_cal_window)
A2_cal_window_zero = align_to_zero(A2_cal_window)
B1_cal_window_zero = align_to_zero(B1_cal_window)
B2_cal_window_zero = align_to_zero(B2_cal_window)
# Sample Window
A1_sample_window_zero = align_to_zero(A1_sample_window)
A2_sample_window_zero = align_to_zero(A2_sample_window)
B1_sample_window_zero = align_to_zero(B1_sample_window)
B2_sample_window_zero = align_to_zero(B2_sample_window)
# Combine data: Merge the zero-aligned time series with "FluidType", "AgeOfCardInDaysAtTimeOfTest", "Fluid_Temperature_Filled", "FluidTypeBin", "CardAgeBin", "FluidTempBin"
A1_cal_window_combine = Merge_data(A1_cal_window_zero,A1_Merged)
A2_cal_window_combine = Merge_data(A2_cal_window_zero,A2_Merged)
B1_cal_window_combine = Merge_data(B1_cal_window_zero,B1_Merged)
B2_cal_window_combine = Merge_data(B2_cal_window_zero,B2_Merged)
## Sample window
A1_sample_window_combine = Merge_data(A1_sample_window_zero,A1_Merged)
A2_sample_window_combine = Merge_data(A2_sample_window_zero,A2_Merged)
B1_sample_window_combine = Merge_data(B1_sample_window_zero,B1_Merged)
B2_sample_window_combine = Merge_data(B2_sample_window_zero,B2_Merged)
System1_Index, System2_Index = balance_index(A1_cal_window_combine,A2_cal_window_combine,"CardAgeBin")
System1 Sensor A & B distribution: [0-9] 142 (9-28] 142 (28-56] 142 (56-84] 142 (84-112] 142 (112-140] 142 (140-168] 142 (168-196] 142 (196-224] 142 (224-252] 142 Name: CardAgeBin, dtype: int64 System2 Sensor A & B distribution: [0-9] 142 (9-28] 142 (28-56] 142 (56-84] 142 (84-112] 142 (112-140] 142 (140-168] 142 (168-196] 142 (196-224] 142 (224-252] 142 Name: CardAgeBin, dtype: int64
# Balanced data
A1_cal_window_combine_balanced = A1_cal_window_combine.loc[System1_Index]
A1_sample_window_combine_balanced = A1_sample_window_combine.loc[System1_Index]
A2_cal_window_combine_balanced = A2_cal_window_combine.loc[System2_Index]
A2_sample_window_combine_balanced = A2_sample_window_combine.loc[System2_Index]
B1_cal_window_combine_balanced = B1_cal_window_combine.loc[System1_Index]
B1_sample_window_combine_balanced = B1_sample_window_combine.loc[System1_Index]
B2_cal_window_combine_balanced = B2_cal_window_combine.loc[System2_Index]
B2_sample_window_combine_balanced = B2_sample_window_combine.loc[System2_Index]
# Plot all the balanced time series from the window extraction
plot_all_time_series_in_group(A1_cal_window_combine_balanced, A1_sample_window_combine_balanced, A2_cal_window_combine_balanced, A2_sample_window_combine_balanced, "CardAgeBin", "System 1A - CalWindow", "System 1A - SampleWindow","System 2A - CalWindow", "System 2A - SampleWindow")
# Plot all the balanced time series from the window extraction
plot_all_time_series_in_group(B1_cal_window_combine_balanced, B1_sample_window_combine_balanced, B2_cal_window_combine_balanced, B2_sample_window_combine_balanced, "CardAgeBin", "System 1B - CalWindow", "System 1B - SampleWindow","System 2B - CalWindow", "System 2B - SampleWindow")
The following seccion will introduce
pc_scores_s1_A_cal_window, pc_scores_s2_A_cal_window,fpca_s1_A_cal_window,fpca_s2_A_cal_window = fpca_two_inputs(A1_cal_window_combine_balanced.iloc[:,:-6], A2_cal_window_combine_balanced.iloc[:,:-6], color_fpc1_s1='tab:blue', color_fpc2_s1='tab:cyan', color_fpc1_s2='tab:orange', color_fpc2_s2='gold')
print("--------------------------------------------------- Bootstrap -------------------------------------------------------------------------------------------")
ac1, ac2 = bootstrap(A1_cal_window_combine_balanced, A2_cal_window_combine_balanced,"A","cal_window",features="CardAgeBin")
print("--------------------------------------------------- PCA Scores -------------------------------------------------------------------------------------------")
create_pc_scores_plots(pc_scores_s1_A_cal_window, pc_scores_s2_A_cal_window, A1_cal_window_combine_balanced, A2_cal_window_combine_balanced,features="CardAgeBin")
S1 Explain variance PC1 (%): 99.87217788257932 S1 Explain variance PC2 (%): 0.03137443830513182 S2 Explain variance PC1 (%): 99.92034619403653 S2 Explain variance PC2 (%): 0.021775740650383443 The time series contributing most to PC1 is at index 800 with TestID 12529762.0 The time series contributing most to PC2 is at index 82 with TestID 12615989.0 The time series contributing most to PC1 is at index 91 with TestID 3568638 The time series contributing most to PC2 is at index 19 with TestID 3559978
--------------------------------------------------- Bootstrap ------------------------------------------------------------------------------------------- Confidence Interval of 1st component The number of sampling is 142 The boxplot of 1st Component
--------------------------------------------------- PCA Scores -------------------------------------------------------------------------------------------
pc_scores_s1_A_sample_window, pc_scores_s2_A_sample_window,fpca_s1_A_sample_window,fpca_s2_A_sample_window = fpca_two_inputs(A1_sample_window_combine_balanced.iloc[:,:-6], A2_sample_window_combine_balanced.iloc[:,:-6], color_fpc1_s1='tab:blue', color_fpc2_s1='tab:cyan', color_fpc1_s2='tab:orange', color_fpc2_s2='gold')
print("--------------------------------------------------- Bootstrap -------------------------------------------------------------------------------------------")
as1,as2 = bootstrap(A1_sample_window_combine_balanced, A2_sample_window_combine_balanced,"A","sample_window",features="CardAgeBin")
print("--------------------------------------------------- PCA Scores -------------------------------------------------------------------------------------------")
create_pc_scores_plots(pc_scores_s1_A_sample_window, pc_scores_s2_A_sample_window, A1_sample_window_combine_balanced, A2_sample_window_combine_balanced,features="CardAgeBin")
S1 Explain variance PC1 (%): 99.54001643310667 S1 Explain variance PC2 (%): 0.13376186892582556 S2 Explain variance PC1 (%): 99.8564940756743 S2 Explain variance PC2 (%): 0.044519141846876005 The time series contributing most to PC1 is at index 800 with TestID 12529762.0 The time series contributing most to PC2 is at index 261 with TestID 12515884.0 The time series contributing most to PC1 is at index 140 with TestID 3568703 The time series contributing most to PC2 is at index 682 with TestID 3561428
--------------------------------------------------- Bootstrap ------------------------------------------------------------------------------------------- Confidence Interval of 1st component The number of sampling is 142 The boxplot of 1st Component
--------------------------------------------------- PCA Scores -------------------------------------------------------------------------------------------
pc_scores_s1_B_cal_window, pc_scores_s2_B_cal_window,fpca_s1_B_cal_window,fpca_s2_B_cal_window = fpca_two_inputs(B1_cal_window_combine_balanced.iloc[:,:-6], B2_cal_window_combine_balanced.iloc[:,:-6], color_fpc1_s1='tab:blue', color_fpc2_s1='tab:cyan', color_fpc1_s2='tab:orange', color_fpc2_s2='gold')
print("--------------------------------------------------- Bootstrap -------------------------------------------------------------------------------------------")
bc1,bc2 = bootstrap(B1_cal_window_combine_balanced, B2_cal_window_combine_balanced,"B","cal_window",features="CardAgeBin")
print("--------------------------------------------------- PCA Scores -------------------------------------------------------------------------------------------")
create_pc_scores_plots(pc_scores_s1_B_cal_window, pc_scores_s2_B_cal_window, B1_cal_window_combine_balanced, B2_cal_window_combine_balanced,features="CardAgeBin")
S1 Explain variance PC1 (%): 99.85065134319608 S1 Explain variance PC2 (%): 0.08925385168183515 S2 Explain variance PC1 (%): 99.80269180777617 S2 Explain variance PC2 (%): 0.10457640689465807 The time series contributing most to PC1 is at index 82 with TestID 12615989.0 The time series contributing most to PC2 is at index 664 with TestID 12371094.0 The time series contributing most to PC1 is at index 53 with TestID 3565690.0 The time series contributing most to PC2 is at index 53 with TestID 3565690.0
--------------------------------------------------- Bootstrap ------------------------------------------------------------------------------------------- Confidence Interval of 1st component The number of sampling is 142 The boxplot of 1st Component
--------------------------------------------------- PCA Scores -------------------------------------------------------------------------------------------
pc_scores_s1_B_sample_window, pc_scores_s2_B_sample_window,fpca_s1_B_sample_window,fpca_s2_B_sample_window = fpca_two_inputs(B1_sample_window_combine_balanced.iloc[:,:-6], B2_sample_window_combine_balanced.iloc[:,:-6], color_fpc1_s1='tab:blue', color_fpc2_s1='tab:cyan', color_fpc1_s2='tab:orange', color_fpc2_s2='gold')
print("--------------------------------------------------- Bootstrap -------------------------------------------------------------------------------------------")
bs1,bs2 = bootstrap(B1_sample_window_combine_balanced, B2_sample_window_combine_balanced, "B","sample_window",features="CardAgeBin")
print("--------------------------------------------------- PCA Scores -------------------------------------------------------------------------------------------")
create_pc_scores_plots(pc_scores_s1_B_sample_window, pc_scores_s2_B_sample_window, B1_sample_window_combine_balanced, B2_sample_window_combine_balanced,features="CardAgeBin")
S1 Explain variance PC1 (%): 99.79199457851568 S1 Explain variance PC2 (%): 0.057099651147668865 S2 Explain variance PC1 (%): 99.82212927726086 S2 Explain variance PC2 (%): 0.06848572564066409 The time series contributing most to PC1 is at index 684 with TestID 12191141.0 The time series contributing most to PC2 is at index 103 with TestID 12581955.0 The time series contributing most to PC1 is at index 666 with TestID 3518710.0 The time series contributing most to PC2 is at index 60 with TestID 3548595.0
--------------------------------------------------- Bootstrap ------------------------------------------------------------------------------------------- Confidence Interval of 1st component The number of sampling is 142 The boxplot of 1st Component
--------------------------------------------------- PCA Scores -------------------------------------------------------------------------------------------
df_list = []
def append_to_dataframe(window_name, slope1, slope2):
global df_list
df_list.append({'Window': window_name, 'Slope 1': slope1, 'Slope 2': slope2})
append_to_dataframe('A_cal_window', *visualize_regression(fpca_s1_A_cal_window, fpca_s2_A_cal_window))
append_to_dataframe('A_sample_window', *visualize_regression(fpca_s1_A_sample_window, fpca_s2_A_sample_window))
append_to_dataframe('B_cal_window', *visualize_regression(fpca_s1_B_cal_window, fpca_s2_B_cal_window))
append_to_dataframe('B_sample_window', *visualize_regression(fpca_s1_B_sample_window, fpca_s2_B_sample_window))
OLS Regression Results
==============================================================================
Dep. Variable: y R-squared: 1.000
Model: OLS Adj. R-squared: 1.000
Method: Least Squares F-statistic: 7.930e+05
Date: Sat, 15 Jun 2024 Prob (F-statistic): 1.09e-83
Time: 19:35:03 Log-Likelihood: 242.45
No. Observations: 40 AIC: -480.9
Df Residuals: 38 BIC: -477.5
Df Model: 1
Covariance Type: nonrobust
==============================================================================
coef std err t P>|t| [0.025 0.975]
------------------------------------------------------------------------------
const 0.0058 0.000 30.975 0.000 0.005 0.006
x1 -0.0071 7.93e-06 -890.531 0.000 -0.007 -0.007
==============================================================================
Omnibus: 3.406 Durbin-Watson: 0.109
Prob(Omnibus): 0.182 Jarque-Bera (JB): 3.120
Skew: 0.618 Prob(JB): 0.210
Kurtosis: 2.415 Cond. No. 48.0
==============================================================================
Notes:
[1] Standard Errors assume that the covariance matrix of the errors is correctly specified.
OLS Regression Results
==============================================================================
Dep. Variable: y R-squared: 1.000
Model: OLS Adj. R-squared: 1.000
Method: Least Squares F-statistic: 9.841e+05
Date: Sat, 15 Jun 2024 Prob (F-statistic): 1.81e-85
Time: 19:35:03 Log-Likelihood: 246.71
No. Observations: 40 AIC: -489.4
Df Residuals: 38 BIC: -486.0
Df Model: 1
Covariance Type: nonrobust
==============================================================================
coef std err t P>|t| [0.025 0.975]
------------------------------------------------------------------------------
const 0.0060 0.000 35.935 0.000 0.006 0.006
x1 -0.0071 7.13e-06 -992.033 0.000 -0.007 -0.007
==============================================================================
Omnibus: 4.379 Durbin-Watson: 0.152
Prob(Omnibus): 0.112 Jarque-Bera (JB): 4.143
Skew: 0.747 Prob(JB): 0.126
Kurtosis: 2.493 Cond. No. 48.0
==============================================================================
Notes:
[1] Standard Errors assume that the covariance matrix of the errors is correctly specified.
OLS Regression Results
==============================================================================
Dep. Variable: y R-squared: 1.000
Model: OLS Adj. R-squared: 1.000
Method: Least Squares F-statistic: 5.446e+05
Date: Sat, 15 Jun 2024 Prob (F-statistic): 8.16e-52
Time: 19:35:03 Log-Likelihood: 146.38
No. Observations: 25 AIC: -288.8
Df Residuals: 23 BIC: -286.3
Df Model: 1
Covariance Type: nonrobust
==============================================================================
coef std err t P>|t| [0.025 0.975]
------------------------------------------------------------------------------
const 0.0157 0.000 52.697 0.000 0.015 0.016
x1 -0.0148 2e-05 -737.942 0.000 -0.015 -0.015
==============================================================================
Omnibus: 0.529 Durbin-Watson: 0.661
Prob(Omnibus): 0.768 Jarque-Bera (JB): 0.461
Skew: -0.296 Prob(JB): 0.794
Kurtosis: 2.698 Cond. No. 30.8
==============================================================================
Notes:
[1] Standard Errors assume that the covariance matrix of the errors is correctly specified.
OLS Regression Results
==============================================================================
Dep. Variable: y R-squared: 1.000
Model: OLS Adj. R-squared: 1.000
Method: Least Squares F-statistic: 2.177e+06
Date: Sat, 15 Jun 2024 Prob (F-statistic): 9.79e-59
Time: 19:35:03 Log-Likelihood: 163.89
No. Observations: 25 AIC: -323.8
Df Residuals: 23 BIC: -321.3
Df Model: 1
Covariance Type: nonrobust
==============================================================================
coef std err t P>|t| [0.025 0.975]
------------------------------------------------------------------------------
const 0.0139 0.000 94.273 0.000 0.014 0.014
x1 -0.0147 9.95e-06 -1475.467 0.000 -0.015 -0.015
==============================================================================
Omnibus: 4.495 Durbin-Watson: 0.405
Prob(Omnibus): 0.106 Jarque-Bera (JB): 3.450
Skew: 0.910 Prob(JB): 0.178
Kurtosis: 3.001 Cond. No. 30.8
==============================================================================
Notes:
[1] Standard Errors assume that the covariance matrix of the errors is correctly specified.
OLS Regression Results
==============================================================================
Dep. Variable: y R-squared: 1.000
Model: OLS Adj. R-squared: 1.000
Method: Least Squares F-statistic: 2.793e+05
Date: Sat, 15 Jun 2024 Prob (F-statistic): 7.17e-156
Time: 19:35:03 Log-Likelihood: 499.94
No. Observations: 90 AIC: -995.9
Df Residuals: 88 BIC: -990.9
Df Model: 1
Covariance Type: nonrobust
==============================================================================
coef std err t P>|t| [0.025 0.975]
------------------------------------------------------------------------------
const -0.0001 0.000 -0.508 0.613 -0.001 0.000
x1 0.0020 3.84e-06 528.450 0.000 0.002 0.002
==============================================================================
Omnibus: 13.947 Durbin-Watson: 0.016
Prob(Omnibus): 0.001 Jarque-Bera (JB): 8.997
Skew: -0.629 Prob(JB): 0.0111
Kurtosis: 2.097 Cond. No. 106.
==============================================================================
Notes:
[1] Standard Errors assume that the covariance matrix of the errors is correctly specified.
OLS Regression Results
==============================================================================
Dep. Variable: y R-squared: 1.000
Model: OLS Adj. R-squared: 1.000
Method: Least Squares F-statistic: 1.918e+05
Date: Sat, 15 Jun 2024 Prob (F-statistic): 1.07e-148
Time: 19:35:03 Log-Likelihood: 483.46
No. Observations: 90 AIC: -962.9
Df Residuals: 88 BIC: -957.9
Df Model: 1
Covariance Type: nonrobust
==============================================================================
coef std err t P>|t| [0.025 0.975]
------------------------------------------------------------------------------
const 0.0005 0.000 1.926 0.057 -1.49e-05 0.001
x1 0.0020 4.61e-06 437.980 0.000 0.002 0.002
==============================================================================
Omnibus: 10.984 Durbin-Watson: 0.011
Prob(Omnibus): 0.004 Jarque-Bera (JB): 9.660
Skew: -0.717 Prob(JB): 0.00799
Kurtosis: 2.277 Cond. No. 106.
==============================================================================
Notes:
[1] Standard Errors assume that the covariance matrix of the errors is correctly specified.
OLS Regression Results
==============================================================================
Dep. Variable: y R-squared: 0.999
Model: OLS Adj. R-squared: 0.999
Method: Least Squares F-statistic: 1.747e+04
Date: Sat, 15 Jun 2024 Prob (F-statistic): 2.40e-28
Time: 19:35:03 Log-Likelihood: 83.320
No. Observations: 20 AIC: -162.6
Df Residuals: 18 BIC: -160.6
Df Model: 1
Covariance Type: nonrobust
==============================================================================
coef std err t P>|t| [0.025 0.975]
------------------------------------------------------------------------------
const 0.0128 0.002 6.937 0.000 0.009 0.017
x1 -0.0203 0.000 -132.177 0.000 -0.021 -0.020
==============================================================================
Omnibus: 2.319 Durbin-Watson: 0.138
Prob(Omnibus): 0.314 Jarque-Bera (JB): 1.832
Skew: 0.609 Prob(JB): 0.400
Kurtosis: 2.154 Cond. No. 25.0
==============================================================================
Notes:
[1] Standard Errors assume that the covariance matrix of the errors is correctly specified.
OLS Regression Results
==============================================================================
Dep. Variable: y R-squared: 0.999
Model: OLS Adj. R-squared: 0.999
Method: Least Squares F-statistic: 2.826e+04
Date: Sat, 15 Jun 2024 Prob (F-statistic): 3.18e-30
Time: 19:35:03 Log-Likelihood: 87.997
No. Observations: 20 AIC: -172.0
Df Residuals: 18 BIC: -170.0
Df Model: 1
Covariance Type: nonrobust
==============================================================================
coef std err t P>|t| [0.025 0.975]
------------------------------------------------------------------------------
const 0.0145 0.001 9.951 0.000 0.011 0.018
x1 -0.0204 0.000 -168.104 0.000 -0.021 -0.020
==============================================================================
Omnibus: 2.754 Durbin-Watson: 0.144
Prob(Omnibus): 0.252 Jarque-Bera (JB): 2.128
Skew: 0.668 Prob(JB): 0.345
Kurtosis: 2.125 Cond. No. 25.0
==============================================================================
Notes:
[1] Standard Errors assume that the covariance matrix of the errors is correctly specified.
slopes_df = pd.DataFrame(df_list)
slopes_df
| Window | Slope 1 | Slope 2 | |
|---|---|---|---|
| 0 | A_cal_window | -0.007061 | -0.007070 |
| 1 | A_sample_window | -0.014793 | -0.014682 |
| 2 | B_cal_window | 0.002030 | 0.002020 |
| 3 | B_sample_window | -0.020282 | -0.020416 |
This is another functional Data Analysis method. Unlike FPCA, the following analysis utilizes the entire time series in a balanced and centered dataset as response variables for regression with the features grouped by bins. This is done to distinguish between two systems under the influence of features.
These are the coefficients from the output of the model.
print("System 1:")
A1_cal_window_funct_reg = Function_regression(A1_cal_window_combine_balanced,40,['AgeOfCardInDaysAtTimeOfTest'])
print("----------------------------------------------------------------------------")
print("\n","System 2:")
A2_cal_window_funct_reg = Function_regression(A2_cal_window_combine_balanced,40,['AgeOfCardInDaysAtTimeOfTest'])
System 1:
Model Summary:
Intercept: FDataBasis(
_basis=FourierBasis(domain_range=((0.0, 39.0),), n_basis=41, period=39.0),
coefficients=[[ 5.44831087e-01 -2.48517680e-01 -3.73647104e-02 -1.03891504e-01
-2.58439000e-02 -5.99697051e-02 -2.65371333e-03 -7.00853023e-02
9.57908317e-03 -5.51903945e-02 8.00488064e-03 -8.65443174e-02
-8.70556984e-03 -3.42938125e-02 -3.66511518e-02 -3.07548887e-02
4.96307464e-04 -5.53743332e-02 -4.09042855e-02 -2.42214352e-02
-8.89843291e-03 -4.75685312e-02 4.60845094e-03 -2.92784615e-02
-1.39147659e-02 -3.74305179e-02 -4.76644256e-02 -3.03830126e-02
-2.91756320e-03 -3.78845803e-02 6.71415720e-03 -9.21303261e-02
-1.40742815e-02 -1.01446250e-01 -8.15209354e-03 -1.64676788e-01
-8.51399471e-03 -2.53666543e+13 2.99952283e+11 -2.53666543e+13
-2.99952283e+11]])
Coefficient of AgeOfCardInDaysAtTimeOfTest: FDataBasis(
_basis=FourierBasis(domain_range=((0.0, 39.0),), n_basis=41, period=39.0),
coefficients=[[ 1.18738419e-02 -5.55202998e-03 -7.12022128e-04 -2.01225702e-03
-6.61002740e-04 -1.27536169e-03 1.30976241e-04 -9.66391188e-04
3.46259348e-04 -1.14881492e-03 1.55435324e-04 -8.70642605e-04
-1.69066303e-04 -8.00293848e-04 2.84906358e-05 -6.45470177e-04
-8.68556409e-05 -4.06860407e-04 3.85114633e-04 -9.96990616e-04
2.53289787e-04 -8.67523746e-04 1.00469545e-04 -8.74869444e-04
-8.45185813e-05 -6.43713834e-04 5.97328159e-05 -9.55442707e-04
3.00038413e-05 -1.11627423e-03 5.11032860e-04 -1.56760635e-03
4.87036834e-04 -2.56032343e-03 -1.89085071e-04 -3.50338940e-03
-1.85204082e-04 -5.35453548e+11 6.33156082e+09 -5.35453548e+11
-6.33156082e+09]])
----------------------------------------------------------------------------
System 2:
Model Summary:
Intercept: FDataBasis(
_basis=FourierBasis(domain_range=((0.0, 39.0),), n_basis=41, period=39.0),
coefficients=[[ 4.68615381e-01 -2.19591691e-01 -1.78570335e-02 -1.30005160e-01
-2.24575188e-02 -6.42971927e-02 8.20466636e-04 -5.81813804e-02
9.87510412e-04 -2.61558238e-02 7.97775678e-03 -5.68178099e-02
-2.11384013e-02 -3.80944312e-02 -1.46725451e-02 1.03028457e-02
1.43074732e-02 3.59528923e-02 -8.35130055e-04 -4.75814966e-02
-8.43368123e-03 -8.29845658e-02 -1.42018625e-02 -3.95757938e-02
-2.47179226e-02 6.59393170e-03 -1.00845219e-02 -1.32308668e-02
-1.35954109e-02 -5.26166267e-02 1.35792143e-02 -6.50422784e-02
1.11905779e-02 -1.21579718e-01 3.21411298e-03 -1.66447700e-01
-4.85749927e-02 -2.40966324e+13 2.84934695e+11 -2.40966324e+13
-2.84934695e+11]])
Coefficient of AgeOfCardInDaysAtTimeOfTest: FDataBasis(
_basis=FourierBasis(domain_range=((0.0, 39.0),), n_basis=41, period=39.0),
coefficients=[[ 1.48421852e-02 -7.01713619e-03 -8.34447482e-04 -2.36288393e-03
-7.77385013e-04 -1.62171202e-03 1.55914383e-04 -1.35629480e-03
4.71504150e-04 -1.55807181e-03 1.68065485e-04 -1.37269565e-03
-1.29306903e-04 -1.06617184e-03 -1.82270382e-04 -1.13324872e-03
-1.61785627e-04 -1.20877710e-03 8.24377372e-05 -1.01623463e-03
2.78712624e-04 -7.63228642e-04 2.66395179e-04 -1.01085528e-03
-1.03401869e-04 -1.20225806e-03 -2.27625776e-04 -1.22987860e-03
1.24373564e-04 -1.34457428e-03 5.07889584e-04 -2.05971735e-03
3.81209905e-04 -2.93934206e-03 -2.78102780e-04 -4.25908173e-03
1.29165366e-04 -6.62762073e+11 7.83694195e+09 -6.62762073e+11
-7.83694195e+09]])
print("System 1:")
A1_sample_window_funct_reg = Function_regression(A1_sample_window_combine_balanced,25,["AgeOfCardInDaysAtTimeOfTest"])
print("----------------------------------------------------------------------------")
print("\n","System 2:")
A2_sample_window_funct_reg = Function_regression(A2_sample_window_combine_balanced,25,["AgeOfCardInDaysAtTimeOfTest"])
System 1:
Model Summary:
Intercept: FDataBasis(
_basis=FourierBasis(domain_range=((0.0, 24.0),), n_basis=25, period=24.0),
coefficients=[[-3.47405267e-01 9.45639274e-02 6.96805443e-02 -3.41192074e-02
1.35924638e-02 -7.74780484e-02 -4.10167328e-02 -1.16125792e-02
-9.53707165e-02 -7.81899937e-02 -2.43494384e-01 2.55393357e-01
-8.78382363e-02 -1.03298321e-02 1.69542022e-02 1.00396395e-01
9.25932670e-03 9.91599491e-02 5.82284577e-02 8.11088770e-02
1.22798698e-01 2.13497360e-01 3.75288394e-02 1.19839537e+14
2.30508497e-01]])
Coefficient of AgeOfCardInDaysAtTimeOfTest: FDataBasis(
_basis=FourierBasis(domain_range=((0.0, 24.0),), n_basis=25, period=24.0),
coefficients=[[ 3.83486978e-03 -1.28794880e-03 -7.82546285e-04 1.31167284e-04
-1.02832165e-04 9.68728150e-04 5.59264870e-04 -8.66213788e-05
1.10147068e-03 1.00261966e-03 3.26236614e-03 -3.66247700e-03
1.07275973e-03 -1.58717448e-04 -4.67277915e-04 -1.49344973e-03
-5.33692231e-06 -1.39933620e-03 -6.89587321e-04 -1.52795476e-03
-1.40133434e-03 -2.67820855e-03 -7.79961720e-04 -1.69909044e+12
-3.25960820e-03]])
----------------------------------------------------------------------------
System 2:
Model Summary:
Intercept: FDataBasis(
_basis=FourierBasis(domain_range=((0.0, 24.0),), n_basis=25, period=24.0),
coefficients=[[ 9.07166314e-02 -3.17874610e-03 1.55562213e-03 -1.55529228e-02
1.46266266e-02 1.20753237e-02 5.41150525e-05 -3.79117379e-02
8.34520799e-02 -1.36967203e-02 -6.81734026e-02 -1.02799124e-01
9.90685193e-02 -9.30113185e-02 -1.22224155e-01 -3.28762118e-02
1.82506458e-02 -1.11904400e-01 -8.04290752e-02 -4.71767776e-02
4.22758208e-02 5.42534641e-02 9.53950336e-02 -4.16259972e+13
-5.87518425e-02]])
Coefficient of AgeOfCardInDaysAtTimeOfTest: FDataBasis(
_basis=FourierBasis(domain_range=((0.0, 24.0),), n_basis=25, period=24.0),
coefficients=[[ 5.48517334e-03 -1.87849634e-03 -1.61824249e-03 -2.79809980e-04
-4.22071976e-04 1.13783745e-03 7.30599286e-04 7.20357781e-05
9.15312377e-04 1.62904237e-03 5.60346104e-03 -5.80752918e-03
7.07481367e-04 -3.17015916e-04 -4.58440769e-04 -2.59246398e-03
3.90535499e-04 -1.66153850e-03 -2.18474463e-04 -3.12862038e-03
-2.21261125e-03 -4.36495940e-03 -2.48727385e-03 -2.59175088e+12
-5.10115692e-03]])
print("System 1:")
B1_cal_window_funct_reg = Function_regression(B1_cal_window_combine_balanced,90,["AgeOfCardInDaysAtTimeOfTest"])
print("----------------------------------------------------------------------------")
print("\n","System 2:")
B2_cal_window_funct_reg = Function_regression(B2_cal_window_combine_balanced,90,["AgeOfCardInDaysAtTimeOfTest"])
System 1:
Model Summary:
Intercept: FDataBasis(
_basis=FourierBasis(domain_range=((0.0, 89.0),), n_basis=91, period=89.0),
coefficients=[[ 1.41944188e+01 -6.18075643e+00 -1.30919887e+00 -2.39368608e+00
-1.83134625e-01 -2.36676515e+00 3.97693796e-01 -1.41883728e+00
-5.40517823e-01 -4.95265371e-01 -4.54411238e-01 -3.50507286e-01
1.08951474e+00 -7.64794470e-01 6.79133545e-01 -8.38301554e-01
6.22457016e-02 -5.87978400e-01 4.96112543e-01 -8.72747297e-01
1.05314002e+00 -1.32193016e+00 6.16687582e-01 -1.60670875e+00
7.05988157e-02 -1.82184111e+00 9.95172623e-02 -1.30109812e+00
-5.05472751e-01 -1.13295633e+00 -5.26193256e-01 -9.51463359e-01
-9.00683786e-01 -8.53833431e-01 -3.43621093e-01 -1.19133676e+00
-1.02012295e+00 2.68934625e-01 -1.66048368e+00 4.30706708e-01
-4.55529504e-01 -4.28099111e-01 -1.92364062e-01 -5.01679667e-01
-7.34823350e-01 -9.58270947e-02 -5.64465162e-01 -9.19193356e-02
-5.62647501e-01 4.29272744e-01 -7.63253088e-01 8.70708624e-01
-6.69852477e-01 4.53599282e-01 7.00751391e-02 3.40961395e-01
2.66664619e-01 2.16600466e-01 4.13542503e-01 1.40006634e-01
2.69372763e-01 -5.17469534e-01 2.13189244e-01 -8.36191263e-02
6.51723684e-01 -3.10098693e-01 4.86352995e-01 -3.90751186e-01
2.03664818e-02 -8.39404382e-01 -3.88608894e-02 -7.22326990e-01
4.32829163e-01 -6.49650857e-01 4.71450017e-01 -6.61520497e-01
1.86597075e-01 -1.86009072e+00 -2.73574183e-01 -1.42965988e+00
-5.07028944e-01 -2.38608382e-01 -6.04082448e-01 -1.28647030e+00
-3.43943548e-01 -2.28631365e+00 1.43353149e-01 -4.88518865e+14
4.14401855e+11 -4.88518865e+14 -4.14401855e+11]])
Coefficient of AgeOfCardInDaysAtTimeOfTest: FDataBasis(
_basis=FourierBasis(domain_range=((0.0, 89.0),), n_basis=91, period=89.0),
coefficients=[[ 2.77308778e-03 -3.02275603e-04 1.13051320e-04 -4.07613079e-05
-1.83789224e-04 6.55023869e-04 4.78400511e-04 -7.73109568e-04
-1.68673965e-04 3.44093887e-04 -7.41888422e-04 -1.31276533e-03
2.02504134e-04 -1.59577018e-03 3.23105442e-04 -2.06541984e-04
-5.44847302e-04 -1.24398518e-03 1.36200440e-04 -1.92718742e-03
6.19120804e-05 -4.25941449e-04 -7.14207934e-04 1.38951279e-03
7.43407211e-04 1.44489427e-04 6.43915403e-04 -1.21858565e-03
1.38392509e-04 -6.86304157e-04 -6.51953881e-04 6.96336843e-04
-1.03930699e-04 -6.23372649e-04 -1.63625061e-04 -9.81770329e-04
-4.27855658e-04 -1.54221357e-03 6.06777390e-04 -3.17103293e-04
5.58214446e-04 -6.74405462e-05 -4.07873680e-05 -4.45098304e-04
-7.48285019e-04 5.16660497e-04 -1.01901838e-03 4.29369174e-05
-7.59435042e-04 -1.29698468e-03 2.82353057e-04 2.34173143e-04
-6.38391899e-05 4.20394202e-04 3.49486815e-04 -1.07113687e-04
-1.25112525e-04 1.15613800e-03 -1.17631525e-05 -2.57567907e-04
7.45550452e-04 1.04591972e-03 6.70687354e-04 -4.19236563e-04
-4.60616753e-04 -8.89591011e-04 3.40731861e-04 -1.79128094e-04
7.13527816e-04 -1.77897435e-05 9.91017163e-04 2.46021428e-04
-7.41851710e-04 -2.01694303e-04 -3.78495391e-04 -1.82882975e-03
-6.56182562e-04 1.15475275e-03 -8.57988004e-04 7.61039968e-04
-7.86394694e-04 -3.98487928e-04 6.13705975e-04 -1.06347249e-03
7.79397035e-04 -1.35696275e-03 4.05315476e-04 -9.07723879e+10
7.70006004e+07 -9.07723879e+10 -7.70006004e+07]])
----------------------------------------------------------------------------
System 2:
Model Summary:
Intercept: FDataBasis(
_basis=FourierBasis(domain_range=((0.0, 89.0),), n_basis=91, period=89.0),
coefficients=[[ 1.70220797e+01 -7.19127745e+00 -1.54318502e+00 -2.88152320e+00
-2.24960382e-01 -2.71696025e+00 4.84225276e-01 -1.73229884e+00
-7.00428803e-01 -5.42102861e-01 -5.58064428e-01 -6.70014920e-01
1.30836417e+00 -1.04383153e+00 8.42768075e-01 -8.97109417e-01
5.57841920e-02 -6.09096893e-01 4.63736110e-01 -1.54888402e+00
1.20249889e+00 -1.51773640e+00 7.10145290e-01 -2.34201972e+00
3.80733247e-02 -2.07000960e+00 1.93956285e-01 -1.85851212e+00
-5.52458581e-01 -1.50701180e+00 -6.79670680e-01 -9.73752994e-01
-9.65806985e-01 -1.05360529e+00 -4.29418776e-01 -1.65790364e+00
-1.28330770e+00 4.94340774e-02 -1.85420813e+00 4.78479514e-01
-4.13259446e-01 -4.30622947e-01 -2.16453579e-01 -7.84687267e-01
-6.42508753e-01 -8.63207867e-02 -7.90712417e-01 -1.25545597e-01
-7.87228328e-01 3.81728258e-01 -8.60564766e-01 1.04009754e+00
-7.32463881e-01 6.73209198e-01 7.77758716e-02 4.27044999e-01
2.19422011e-01 6.37568718e-01 4.30120157e-01 7.75134033e-02
4.13874324e-01 -5.25344234e-01 3.05677894e-01 -8.34795529e-02
6.17019130e-01 -3.93450726e-01 5.99288154e-01 -5.49932032e-01
8.77628959e-02 -1.17867582e+00 2.70726233e-01 -6.83447242e-01
3.77779178e-01 -8.93195390e-01 3.57469287e-01 -1.25442247e+00
1.05259231e-01 -2.06755664e+00 -5.51576746e-01 -1.26421852e+00
-6.07306651e-01 -4.27097482e-01 -5.86159215e-01 -1.56391461e+00
-4.16953698e-01 -2.82799564e+00 2.92068422e-01 -5.78530692e+14
4.90757285e+11 -5.78530692e+14 -4.90757285e+11]])
Coefficient of AgeOfCardInDaysAtTimeOfTest: FDataBasis(
_basis=FourierBasis(domain_range=((0.0, 89.0),), n_basis=91, period=89.0),
coefficients=[[-4.10164572e-04 -2.59702305e-04 1.75877812e-04 7.67494189e-04
-9.29753346e-05 6.86989088e-04 3.65037850e-04 -2.40081661e-04
1.94921561e-04 -2.24116042e-04 -4.94552880e-04 2.03285956e-04
-4.04938834e-04 -6.00155721e-04 -1.41058692e-04 -7.51679039e-04
-9.35494709e-04 -2.11626689e-03 8.24394890e-04 2.04803953e-03
1.91722314e-04 -9.52807650e-04 -5.59418338e-04 5.30133606e-03
1.31995919e-03 5.81205997e-05 -1.53200881e-04 1.09612024e-03
9.41581802e-05 6.22255501e-04 -2.10547461e-04 -1.51154405e-04
-1.05650620e-03 -2.42021464e-05 -1.94924673e-04 1.35889611e-03
4.51400129e-04 3.89403516e-04 2.17975667e-04 -2.46304343e-04
-3.91277124e-05 -6.90644304e-04 9.29269663e-05 8.99796263e-04
-2.22035311e-03 2.60672118e-04 -3.05250845e-04 2.59962075e-04
1.16519803e-04 -6.02668261e-04 -1.57661601e-05 8.40648569e-05
-5.87776101e-04 -7.28208552e-04 4.76488045e-04 -4.09683025e-04
5.32912561e-04 -1.68732282e-03 3.49559307e-04 4.19811708e-04
8.58400541e-05 5.59706393e-04 8.72270266e-05 -2.62072669e-04
4.41100529e-04 -6.15105851e-04 -6.48097116e-05 6.69823584e-04
3.89626483e-04 1.40724023e-03 -1.31763754e-03 -7.68198348e-04
-7.61078893e-05 7.63366607e-04 8.34137794e-04 1.90730314e-03
5.20978925e-06 8.03917459e-04 5.22908523e-04 -2.33046592e-03
-7.53972689e-04 4.85380364e-04 -9.12570121e-05 -7.76734104e-04
6.22482322e-04 -7.21254735e-05 -1.12497863e-04 2.72234706e+10
-2.30931854e+07 2.72234706e+10 2.30931854e+07]])
print("System 1:")
B1_sample_window_funct_reg = Function_regression(B1_sample_window_combine_balanced,20,["AgeOfCardInDaysAtTimeOfTest"])
print("----------------------------------------------------------------------------")
print("\n","System 2:")
B2_sample_window_funct_reg = Function_regression(B2_sample_window_combine_balanced,20,["AgeOfCardInDaysAtTimeOfTest"])
System 1:
Model Summary:
Intercept: FDataBasis(
_basis=FourierBasis(domain_range=((0.0, 19.0),), n_basis=21, period=19.0),
coefficients=[[ 1.94477262e+00 -5.74698778e-01 4.94399705e-01 -1.88917790e-01
4.59279146e-01 -4.46228280e-01 4.80121074e-01 -3.09173927e-01
2.84898091e-01 -4.26428562e-01 4.60382637e-01 -1.77049198e-01
4.30580453e-01 -2.28338562e-01 2.39491499e-01 -3.79670013e-01
3.09234260e-01 -4.73380784e+14 2.33768288e+12 -4.73380784e+14
-2.33768288e+12]])
Coefficient of AgeOfCardInDaysAtTimeOfTest: FDataBasis(
_basis=FourierBasis(domain_range=((0.0, 19.0),), n_basis=21, period=19.0),
coefficients=[[-7.09581020e-04 -8.68061993e-04 -3.74666503e-04 -8.59587528e-04
-3.89071731e-04 6.07273996e-04 -5.45706512e-04 6.85532296e-04
2.66439598e-04 7.82302116e-04 -1.00400795e-03 -1.08968462e-03
-6.22492728e-04 -9.25278571e-05 6.79326947e-04 -1.04001312e-03
6.94320739e-04 1.28710581e+11 -6.35607808e+08 1.28710581e+11
6.35607808e+08]])
----------------------------------------------------------------------------
System 2:
Model Summary:
Intercept: FDataBasis(
_basis=FourierBasis(domain_range=((0.0, 19.0),), n_basis=21, period=19.0),
coefficients=[[ 1.98901146e+00 -7.42027041e-01 1.77884389e-01 -3.37591842e-01
3.71614792e-01 -4.40190211e-01 6.55385015e-01 -1.19814662e-01
3.86821589e-01 -5.26560597e-01 1.10216880e-01 -2.69764412e-01
3.13151991e-01 -3.18267893e-01 2.44097276e-01 -5.67642831e-01
4.95133871e-01 -5.24144820e+14 2.58836948e+12 -5.24144820e+14
-2.58836948e+12]])
Coefficient of AgeOfCardInDaysAtTimeOfTest: FDataBasis(
_basis=FourierBasis(domain_range=((0.0, 19.0),), n_basis=21, period=19.0),
coefficients=[[ 2.36839893e-04 4.53369326e-05 2.25288678e-03 -3.34273709e-04
5.98659250e-04 4.55030227e-04 -1.70395673e-03 -5.81456586e-04
-4.06676935e-04 1.52526982e-03 1.89942111e-03 1.77456302e-05
6.31515897e-04 2.66933918e-04 8.38422756e-04 4.66257997e-04
-6.38571247e-04 1.98617969e+11 -9.80829477e+08 1.98617969e+11
9.80829477e+08]])
coefficent_visualization(A1_cal_window_funct_reg,A2_cal_window_funct_reg,["AgeOfCardInDaysAtTimeOfTest"],range(1,36),"SensorA Cal window")
coefficent_visualization(A1_sample_window_funct_reg,A2_sample_window_funct_reg,["AgeOfCardInDaysAtTimeOfTest"],range(1,23),"SensorA sample window")
coefficent_visualization(B1_cal_window_funct_reg,B2_cal_window_funct_reg,["AgeOfCardInDaysAtTimeOfTest"],range(1,86),"SensorB Cal window")
coefficent_visualization(B1_sample_window_funct_reg, B2_sample_window_funct_reg, ["AgeOfCardInDaysAtTimeOfTest"], range(1, 16), "SensorB Sample window")